Pupil size light reflex to a light test is a potential test to determine a person is under the influence of THC and be able to be use as a field sobriety measures. The goals of the analysis to:
Pupil size light reflex to light does not habituated to THC use (smoking).
Time for smoking to post test maybe an important variable to model b/c:
There is a circadian pattern to pupil size (https://www.ncbi.nlm.nih.gov/pmc/articles/PMC7445830/).
Currently this field does not use FDA, so any FDA in topic area may be publishable
Data on pupil size during light reflex test. Pupil size was extracted at the image level based on image analysis techniques. Each test was performed simultaneously on right and left eye before and after THC use (smoking).
ps <- read.csv(file.path(data_folder, ps_folder, "pupil_data_with_demo.csv"))
ps$demo_gender <- factor(ps$demo_gender,
levels = c(1,2),
labels = c("Male", "Female"))
ps$user_cat <- NA
ps$user_cat[ps$user_type == "non-user"] <- 0
ps$user_cat[ps$user_type == "occasional"] <- 1
ps$user_cat[ps$user_type == "daily"] <- 2
ps$user_cat <- factor(ps$user_cat,
levels = c(0,1,2),
labels = c("non-user",
"occasional",
"daily"))
ps$tp <- NA
ps$tp[ps$time == "pre2"] <- 0
ps$tp[ps$time == "post"] <- 1
ps$tp <- factor(ps$tp,
levels = c(0,1),
labels = c("pre2", "post"))
# str(ps)
#impute frame number
for(i in 2:(nrow(ps)-1)){
if(is.na(ps$frame[i]) & is.na(ps$frame[i+1] - ps$frame[i-1])){
ps$frame[i] <- ps$frame[i-1]+1
}else(if(is.na(ps$frame[i]) & (ps$frame[i+1] - ps$frame[i-1]) == 2){
ps$frame[i] <- ps$frame[i-1]+1
}else(if(is.na(ps$frame[i]) & (ps$frame[i+1] - ps$frame[i-1]) > 2){
if(abs(ps$percent_change[i] - ps$percent_change[i-1]) <
abs(ps$percent_change[i+1] - ps$percent_change[i])){
ps$frame[i] <- ps$frame[i-1]+1
}else(ps$frame[i] <- ps$frame[i+1]-1)
}
)
)
}
# total number of subjects
length(unique(ps$subject_id))
## [1] 101
# subject level data
pt.df <- unique(ps[, c("subject_id", "tp", "user_cat", "demo_age",
"demo_weight", "demo_height", "demo_gender", "thc")])
# # Demographics Table by User Category
# table1(~ demo_age + demo_weight + demo_height + demo_gender|user_cat,
# data = pt.df[pt.df$tp == "pre2", ])
# number of subjects by timepoint (pre/post)
table(pt.df$tp)
##
## pre2 post
## 99 95
There are more subjects in total than the by time point. Indicating incomplete data with some subjects missing “pre” and others missing “post” measurement.
Reshape participant demog data to perserve pre and post thc levels
pt.df.w <- reshape(pt.df,
timevar = "tp",
idvar = c("subject_id", "user_cat", "demo_age",
"demo_weight", "demo_height", "demo_gender"),
direction = "wide")
pt.df.w$thc.post[pt.df.w$user_cat == "non-user" & is.na(pt.df.w$thc.post)] <- 0
pt.df.w$thc_chg <- pt.df.w$thc.post - pt.df.w$thc.pre2
pt.df.w$BMI <- pt.df.w$demo_weight*703/(pt.df.w$demo_height)^2
## Need to work on; time formatted in Excel file.
testTimes <- read.xlsx(file.path(data_folder, ps_folder, "All SafetyScan Times_23Aug2022_revSG.xlsx"),
sheet = "Raw")
testTimes$pre_safetyscan_date_convert <- as.Date(testTimes$pre_safetyscan_date, origin = "1899-12-30")
testTimes$pre_safetyscan_time_convert <- as.POSIXct(paste0(testTimes$pre_safetyscan_date_convert, " ", testTimes$pre_safetyscan_time_hr, ":", testTimes$pre_safetyscan_time_min, ":", "00"), format = "%Y-%m-%d %H:%M:%S")
testTimes$consump_start_time_convert <- as.POSIXct(paste0(testTimes$pre_safetyscan_date_convert, " ", testTimes$consump_start_time_hr, ":", testTimes$consump_start_time_min, ":", "00"), format = "%Y-%m-%d %H:%M:%S")
testTimes$consump_end_time_convert <- as.POSIXct(paste0(testTimes$pre_safetyscan_date_convert, " ", testTimes$consump_end_time_hr, ":", testTimes$consump_end_time_min, ":", "00"), format = "%Y-%m-%d %H:%M:%S")
testTimes$post_safetyscan_time_convert <- as.POSIXct(paste0(testTimes$pre_safetyscan_date_convert, " ", testTimes$post_safetyscan_time_hr, ":", testTimes$post_safetyscan_time_min, ":", "00"), format = "%Y-%m-%d %H:%M:%S")
testTimes$postConsumpTimeToTest <- as.numeric(difftime(testTimes$post_safetyscan_time_convert,
testTimes$consump_end_time_convert,
units = "mins"))
testTimes$remove <- ifelse(testTimes$subject_id == "001-056" & is.na(testTimes$postConsumpTimeToTest), 1, 0)
testTimes <- testTimes[testTimes$remove == 0, ]
pt.df <- merge(pt.df.w, testTimes[, c("subject_id", "postConsumpTimeToTest")],
by = "subject_id")
ps <- merge(ps, testTimes[, c("subject_id", "postConsumpTimeToTest")],
by = "subject_id")
non_user.id <- pt.df$subject_id[pt.df$user_cat == "non-user"]
# View(testTimes[testTimes$subject_id %in% non_user.id, ])
No Consumption end time recorded for non-users
# Demographics Table by User Category
table1(~ demo_age + demo_weight+ demo_height+BMI + demo_gender + thc_chg + postConsumpTimeToTest|user_cat,
data = pt.df)
| non-user (N=32) |
occasional (N=36) |
daily (N=33) |
Overall (N=101) |
|
|---|---|---|---|---|
| demo_age | ||||
| Mean (SD) | 32.9 (4.90) | 31.6 (4.98) | 33.5 (5.69) | 32.6 (5.21) |
| Median [Min, Max] | 33.5 [25.7, 42.2] | 30.1 [25.1, 41.9] | 32.6 [25.4, 45.3] | 32.3 [25.1, 45.3] |
| demo_weight | ||||
| Mean (SD) | 171 (48.5) | 173 (33.4) | 176 (33.1) | 173 (38.4) |
| Median [Min, Max] | 165 [85.0, 284] | 170 [105, 240] | 175 [113, 250] | 170 [85.0, 284] |
| demo_height | ||||
| Mean (SD) | 68.0 (4.83) | 69.6 (3.60) | 68.1 (3.52) | 68.6 (4.04) |
| Median [Min, Max] | 67.0 [60.0, 78.0] | 69.5 [61.0, 76.0] | 69.0 [59.0, 76.0] | 69.0 [59.0, 78.0] |
| BMI | ||||
| Mean (SD) | 25.5 (4.89) | 25.0 (4.02) | 26.7 (4.42) | 25.7 (4.46) |
| Median [Min, Max] | 24.5 [16.6, 34.2] | 24.5 [16.9, 32.6] | 25.8 [18.9, 34.9] | 25.1 [16.6, 34.9] |
| demo_gender | ||||
| Male | 13 (40.6%) | 23 (63.9%) | 18 (54.5%) | 54 (53.5%) |
| Female | 19 (59.4%) | 13 (36.1%) | 15 (45.5%) | 47 (46.5%) |
| thc_chg | ||||
| Mean (SD) | 0 (0) | 7.56 (9.45) | 32.4 (32.7) | 12.7 (23.3) |
| Median [Min, Max] | 0 [0, 0] | 5.30 [0, 46.6] | 20.1 [1.32, 124] | 3.91 [0, 124] |
| Missing | 0 (0%) | 4 (11.1%) | 4 (12.1%) | 8 (7.9%) |
| postConsumpTimeToTest | ||||
| Mean (SD) | NA (NA) | 64.2 (6.52) | 61.2 (4.87) | 62.7 (5.95) |
| Median [Min, Max] | NA [NA, NA] | 64.0 [54.0, 84.0] | 60.0 [53.0, 74.0] | 62.0 [53.0, 84.0] |
| Missing | 32 (100%) | 0 (0%) | 0 (0%) | 32 (31.7%) |
I’m looking for any sharp declines over 10 frames after the 150th frame and then visualizing those trajectories to determine if there might be misalignment of the light test start frame.
ps$lagPctChg <- c(rep(0, 10), diff(ps$percent_change, lag = 10))
ps$lagID <- dplyr::lag(ps$subject_id, 10)
ps$lagtime <- dplyr::lag(ps$time, 10)
ps$lagEye <- dplyr::lag(ps$eye, 10)
ps$lagPctChg <- ifelse(ps$subject_id == ps$lagID & ps$time == ps$lagtime & ps$eye == ps$lagEye,
ps$lagPctChg, 0)
summary(ps$lagPctChg[ps$frame > 150])
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## -46.5074 -0.1752 0.2059 0.2355 0.9679 39.8611
hist(ps$lagPctChg[ps$frame > 150], breaks = 1000)
ps.150 <- ps[ps$frame > 150 & ps$eye == "Right", ]
pot.misaligned <- unique(ps.150[ps.150$lagPctChg <= -5, c("subject_id", "time", "user_type", "eye")])
pot.misaligned <- pot.misaligned[!(is.na(pot.misaligned$subject_id)), ]
pot.misaligned$misaligned <- 1
misaligned <- merge(ps, pot.misaligned,
by= c("subject_id", "time", "user_type", "eye"),
all.y = T)
mis.right <- misaligned[misaligned$eye == "Right", ]
misAlign.id <- unique(misaligned$subject_id)
for(i in misAlign.id){
print(ggplot(mis.right[mis.right$subject_id == i, ],
aes(x=frame, y = percent_change,
group = subject_id, color = i))+
geom_line()+
facet_grid(rows = vars(subject_id), cols = vars(tp))+
labs(title = paste0("Potential MisAligned Subject: ", i))+
theme_bw())
}
# jpeg(file.path(ps_folder, "Potential_Misaligned_001_109.jpg"),
# width = 12, height = 5, units = "in", res = 300)
# ggplot(mis.right[mis.right$subject_id == "001-109", ],
# aes(x=frame, y = percent_change, group = subject_id, color = subject_id))+
# geom_line()+
# facet_grid(rows = vars(subject_id), cols = vars(tp))+
# labs(title = "Potential MisAligned ")+
# theme_bw()
# dev.off()
# jpeg(file.path(ps_folder, "Potential_Misaligned_001_060.jpg"),
# width = 12, height = 5, units = "in", res = 300)
# ggplot(mis.right[mis.right$subject_id == "001-060", ],
# aes(x=frame, y = percent_change, group = subject_id, color = subject_id))+
# geom_line()+
# facet_grid(rows = vars(subject_id), cols = vars(tp))+
# labs(title = "Potential MisAligned ")+
# theme_bw()
# dev.off()
### New alignment view
# jpeg(file.path(ps_folder, "Potential_Misaligned_001_007.jpg"),
# width = 12, height = 5, units = "in", res = 300)
# ggplot(mis.right[mis.right$subject_id == "001-007", ],
# aes(x=frame, y = percent_change, group = subject_id, color = subject_id))+
# geom_line()+
# facet_grid(rows = vars(subject_id), cols = vars(tp))+
# labs(title = "Potential MisAligned: start at 2nd bump?")+
# theme_bw()
# dev.off()
#
# jpeg(file.path(ps_folder, "Potential_Misaligned_001_033.jpg"),
# width = 12, height = 5, units = "in", res = 300)
# ggplot(mis.right[mis.right$subject_id == "001-033", ],
# aes(x=frame, y = percent_change, group = subject_id, color = subject_id))+
# geom_line()+
# facet_grid(rows = vars(subject_id), cols = vars(tp))+
# labs(title = "Potential MisAligned: Odd pattern")+
# theme_bw()
# dev.off()
#
# jpeg(file.path(ps_folder, "Potential_Misaligned_001_038.jpg"),
# width = 12, height = 5, units = "in", res = 300)
# ggplot(mis.right[mis.right$subject_id == "001-038", ],
# aes(x=frame, y = percent_change, group = subject_id, color = subject_id))+
# geom_line()+
# facet_grid(rows = vars(subject_id), cols = vars(tp))+
# labs(title = "Potential MisAligned: Odd pattern")+
# theme_bw()
# dev.off()
#
# jpeg(file.path(ps_folder, "Potential_Misaligned_001_043.jpg"),
# width = 12, height = 5, units = "in", res = 300)
# ggplot(mis.right[mis.right$subject_id == "001-043", ],
# aes(x=frame, y = percent_change, group = subject_id, color = subject_id))+
# geom_line()+
# facet_grid(rows = vars(subject_id), cols = vars(tp))+
# labs(title = "Potential MisAligned: Both look odd, but mainly check pre; maybe review video")+
# theme_bw()
# dev.off()
Remove Outliers
## Remove known outliers
ps$outliers <- 0
ps$outliers[ps$subject_id == "001-109" & ps$tp == "pre2"] <- 1
ps$outliers[ps$subject_id == "001-060" & ps$tp == "post"] <- 1
ps <- ps[ps$outliers == 0, ]
Plot of Pupil Size and Percent Change for “PRE” data by Eye and User Category
pre.df <- ps[ps$tp == "pre2", ]
ggplot(pre.df, aes(x=frame, y = pupil_size, group = subject_id, color = subject_id))+
geom_line(show.legend = FALSE)+
facet_grid(rows = vars(user_cat), cols = vars(eye))+
labs(title ="Pupil Size by Eye and THC use category")+
theme_bw()
ggplot(pre.df, aes(x=frame, y = percent_change, group = subject_id, color = subject_id))+
geom_line(show.legend = FALSE)+
facet_grid(rows = vars(user_cat), cols = vars(eye))+
labs(title = "Percent Change by Eye and THC use category")+
theme_bw()
Plot of PRE/POST for Right Eye
right.df <- ps[ps$eye == "Right", ]
ggplot(right.df, aes(x=frame, y = pupil_size, group = subject_id, color = subject_id))+
geom_line(show.legend = FALSE)+
facet_grid(rows = vars(user_cat), cols = vars(tp))+
labs(title ="Pupil Size by Pre/Post and THC use category")+
theme_bw()
ggplot(right.df, aes(x=frame, y = percent_change, group = subject_id, color = subject_id))+
geom_line(show.legend = FALSE)+
facet_grid(rows = vars(user_cat), cols = vars(tp))+
labs(title = "Percent Change by Pre/Post and THC use category")+
theme_bw()
Plots of Left and Right Eye for “POST” data. One pt has negative values for pupil size.
post.df <- ps[ps$tp == "post", ]
ggplot(post.df, aes(x=frame, y = pupil_size, group = subject_id, color = subject_id))+
geom_line(show.legend = FALSE)+
facet_grid(rows = vars(user_cat), cols = vars(eye))+
labs(title ="Pupil Size by Eye and THC use category")+
theme_bw()
ggplot(post.df, aes(x=frame, y = percent_change, group = subject_id, color = subject_id))+
geom_line(show.legend = FALSE)+
facet_grid(rows = vars(user_cat), cols = vars(eye))+
labs(title = "Percent Change by Eye and THC use category")+
theme_bw()
Plot the Mean and +/- 1 SD functions for the percent change in pupil light reflex data for the pre and post data by THC user category. (Added code to input frame numbers when NA).
right_0.df <- right.df[right.df$frame >= 0, c("subject_id", "tp", "user_cat",
"frame", "percent_change")]
right_0.w <- reshape(right_0.df,
timevar = "frame",
idvar = c("subject_id", "tp", "user_cat"),
direction = "wide")
rownames(right_0.w) <- paste0(right_0.w$subject_id, "_", right_0.w$tp)
pct_chg <- names(right_0.w[, 4:602])
mean_fxn <- as.data.frame(aggregate(right_0.w[, 4:602],
list(right_0.w$tp,
right_0.w$user_cat),
FUN = function(x) mean(x, na.rm = T)))
mean_fxn.l <- reshape(mean_fxn,
varying = pct_chg,
v.names = "percent_change",
timevar = "frame",
times = pct_chg,
direction = "long")
mean_fxn.l$frame <- as.numeric(gsub("percent_change.", "", mean_fxn.l$frame))
rownames(mean_fxn.l) <- NULL
mean_fxn.l$id <- NULL
names(mean_fxn.l)[names(mean_fxn.l) == "Group.1"] <- "tp"
names(mean_fxn.l)[names(mean_fxn.l) == "Group.2"] <- "user_cat"
mean_fxn.l$grp <- paste0(mean_fxn.l$tp, "_", mean_fxn.l$user_cat)
ggplot(mean_fxn.l, aes(x=frame, y = percent_change, group = grp,
color = user_cat))+
geom_line()+
facet_grid(rows = vars(tp))+
labs(title = "Average Percent Change by Pre/Post and THC use category")+
theme_bw()
## Warning: Removed 449 row(s) containing missing values (geom_path).
sd_fxn <- as.data.frame(aggregate(right_0.w[, 4:602],
list(right_0.w$tp,
right_0.w$user_cat),
FUN = function(x) sd(x, na.rm = T)))
NA’s are when there’s no data in for that time point and user category. Min frame value for NA is 480.
right_0.fpca <- fpca.face(Y = as.matrix(right_0.w[, 4:602]), pve = 0.95, knots = 30, var = T)
# str(right_0.fpca)
# plot_shiny(right_0.fpca)
right_0.fpca.pve <- round(right_0.fpca$evalues/sum(right_0.fpca$evalues)*100, 2)
mu <- right_0.fpca$mu
right_sd <- sqrt(right_0.fpca$evalues)
right_Phi <- right_0.fpca$efunctions
df_plot <- data.frame(frame = seq(0, 598, by = 1),
mu = mu,
errband1 = 2*right_Phi[, 1]*right_sd[1],
errband2 = 2*right_Phi[, 2]*right_sd[2],
errband3 = 2*right_Phi[, 3]*right_sd[3],
errband4 = 2*right_Phi[, 4]*right_sd[4])
colors <- c("Pop.Mean" = "black", "+2SD" = "red", "-2SD" = "blue")
plot_pc1 <- ggplot(data = df_plot, aes(x = frame, y = mu))+
geom_line(aes(color = "Pop.Mean"))+
geom_line(aes(x = frame, y = mu+errband1, color = "+2SD"))+
geom_line(aes(x = frame, y = mu-errband1, color = "-2SD"))+
labs(x = "frame",
y = "Percent Change",
color = "Legend",
title = paste("PC1:", "% Var Explained:", right_0.fpca.pve[1]))+
scale_color_manual(values = colors)+
theme_bw()
plot_pc2 <- ggplot(data = df_plot, aes(x = frame, y = mu))+
geom_line(aes(color = "Pop.Mean"))+
geom_line(aes(x = frame, y = mu+errband2, color = "+2SD"))+
geom_line(aes(x = frame, y = mu-errband2, color = "-2SD"))+
labs(x = "frame",
y = "Percent Change",
color = "Legend",
title = paste("PC2:", "% Var Explained:", right_0.fpca.pve[2]))+
scale_color_manual(values = colors)+
theme_bw()
plot_pc3 <- ggplot(data = df_plot, aes(x = frame, y = mu))+
geom_line(aes(color = "Pop.Mean"))+
geom_line(aes(x = frame, y = mu+errband3, color = "+2SD"))+
geom_line(aes(x = frame, y = mu-errband3, color = "-2SD"))+
labs(x = "frame",
y = "Percent Change",
color = "Legend",
title = paste("PC3:", "% Var Explained:", right_0.fpca.pve[3]))+
scale_color_manual(values = colors)+
theme_bw()
plot_pc4 <- ggplot(data = df_plot, aes(x = frame, y = mu))+
geom_line(aes(color = "Pop.Mean"))+
geom_line(aes(x = frame, y = mu+errband4, color = "+2SD"))+
geom_line(aes(x = frame, y = mu-errband4, color = "-2SD"))+
labs(x = "frame",
y = "Percent Change",
color = "Legend",
title = paste("PC4:", "% Var Explained:", right_0.fpca.pve[4]))+
scale_color_manual(values = colors)+
theme_bw()
plot_pc1
PC1 plot: Individuals with low loading on PC1 (-2SD) have less constriction than the average and individuals with a high loading (+2SD) have more constriction. Rebound effect?
plot_pc2
PC2 plot: Overall shape of trajectory & pattern of recovery
Plot individuals with high/low PC2 overall scores.
right_scores <- right_0.fpca$scores
q.95 <- quantile(right_scores[, 2], p = 0.95)
highPC2 <- rownames(right_scores)[right_scores[, 2] > q.95]
highPC2.df <- data.frame(subject_id = substr(highPC2, 1, 7),
tp = substr(highPC2, 9,12))
for(i in 1:nrow(highPC2.df)){
plot.df <- right_0.df[right_0.df$subject_id == highPC2.df$subject_id[i] & right_0.df$tp == highPC2.df$tp[i], ]
print(ggplot(plot.df, aes(x=frame, y = percent_change, group = subject_id, color = subject_id))+
geom_line(show.legend = FALSE)+
labs(title = paste("Percent Change for high PC2 score --", "Subject:", highPC2.df$subject_id[i], "Timepoint:", highPC2.df$tp[i]))+
theme_bw())
}
q.05 <- quantile(right_scores[, 2], p = 0.05)
lowPC2 <- rownames(right_scores)[right_scores[, 2] < q.05]
lowPC2.df <- data.frame(subject_id = substr(lowPC2, 1, 7),
tp = substr(lowPC2, 9,12))
for(i in 1:nrow(lowPC2.df)){
plot.df <- right_0.df[right_0.df$subject_id == lowPC2.df$subject_id[i] & right_0.df$tp == lowPC2.df$tp[i], ]
print(ggplot(plot.df, aes(x=frame, y = percent_change, group = subject_id, color = subject_id))+
geom_line(show.legend = FALSE)+
labs(title = paste("Percent Change for low PC2 score --", "Subject:", lowPC2.df$subject_id[i], "Timepoint:", lowPC2.df$tp[i]))+
theme_bw())
}
plot_pc3
plot_pc4
right_0.post <- right_0.df[right_0.df$tp == "post", ]
post.ids <- unique(right_0.post$subject_id)
right_0.post.w <- reshape(right_0.post,
timevar = "frame",
idvar = c("subject_id", "tp", "user_cat"),
direction = "wide")
post.vars <- names(right_0.post.w)[4:ncol(right_0.post.w)]
right_0.pt <- reshape(right_0.df,
timevar = "tp",
idvar = c("subject_id", "user_cat", "frame"),
direction = "wide")
right_0.pt$wiPctChg <- right_0.pt$percent_change.post - right_0.pt$percent_change.pre2
right_0.pt <- right_0.pt[, c("subject_id", "user_cat", "frame", "wiPctChg")]
right_0.pt.w <- reshape(right_0.pt,
timevar = "frame",
idvar = c("subject_id", "user_cat"),
direction = "wide")
# remove rows where all data is missing (e.g. pt didn't have both pre and post)
allMissing <- rowSums(is.na(right_0.pt.w[, 3:601]))
rowsAllMissing <- names(allMissing)[allMissing == 599]
right_0.pt.w <- right_0.pt.w[!(rownames(right_0.pt.w) %in% rowsAllMissing), ]
rownames(right_0.pt.w) <- paste0(right_0.pt.w$subject_id, "_", right_0.pt.w$user_cat)
analytic.N <- post.ids[post.ids %in% right_0.pt.w$subject_id]
## Filter all datasets to analytic sample
right_0.df <- right_0.df[right_0.df$subject_id %in% analytic.N,]
right_0.post <- right_0.post[right_0.post$subject_id %in% analytic.N, ]
right_0.post.w <- right_0.post.w[right_0.post.w$subject_id %in% analytic.N ,]
row.names(right_0.post.w) <- right_0.post.w$subject_id
right_0.pt <- right_0.pt[right_0.pt$subject_id %in% analytic.N, ]
right_0.pt.w <- right_0.pt.w[right_0.pt.w$subject_id %in% analytic.N, ]
row.names(right_0.pt.w) <- right_0.pt.w$subject_id
No Consumption end time recorded for non-users
# Demographics Table by User Category
table1(~ demo_age + demo_weight+ demo_height+BMI + demo_gender + thc_chg + postConsumpTimeToTest|user_cat,
data = pt.df[pt.df$subject_id %in% analytic.N, ])
| non-user (N=29) |
occasional (N=29) |
daily (N=25) |
Overall (N=83) |
|
|---|---|---|---|---|
| demo_age | ||||
| Mean (SD) | 32.3 (4.70) | 30.8 (4.43) | 32.8 (5.71) | 31.9 (4.95) |
| Median [Min, Max] | 31.1 [25.7, 42.2] | 29.5 [25.1, 41.9] | 32.0 [25.4, 45.3] | 31.0 [25.1, 45.3] |
| demo_weight | ||||
| Mean (SD) | 167 (48.8) | 168 (34.0) | 180 (29.8) | 171 (38.8) |
| Median [Min, Max] | 162 [85.0, 284] | 165 [105, 240] | 175 [125, 250] | 168 [85.0, 284] |
| demo_height | ||||
| Mean (SD) | 68.0 (4.97) | 69.5 (3.91) | 68.5 (3.40) | 68.7 (4.18) |
| Median [Min, Max] | 67.0 [60.0, 78.0] | 69.0 [61.0, 76.0] | 69.0 [63.0, 76.0] | 69.0 [60.0, 78.0] |
| BMI | ||||
| Mean (SD) | 24.9 (4.72) | 24.3 (3.94) | 27.1 (4.26) | 25.4 (4.42) |
| Median [Min, Max] | 23.9 [16.6, 34.1] | 24.3 [16.9, 32.5] | 26.8 [19.0, 34.9] | 24.7 [16.6, 34.9] |
| demo_gender | ||||
| Male | 13 (44.8%) | 19 (65.5%) | 16 (64.0%) | 48 (57.8%) |
| Female | 16 (55.2%) | 10 (34.5%) | 9 (36.0%) | 35 (42.2%) |
| thc_chg | ||||
| Mean (SD) | 0 (0) | 8.29 (9.87) | 29.9 (31.9) | 11.9 (22.1) |
| Median [Min, Max] | 0 [0, 0] | 5.64 [0, 46.6] | 17.8 [1.32, 124] | 3.93 [0, 124] |
| Missing | 0 (0%) | 1 (3.4%) | 0 (0%) | 1 (1.2%) |
| postConsumpTimeToTest | ||||
| Mean (SD) | NA (NA) | 64.0 (6.37) | 60.2 (3.78) | 62.2 (5.62) |
| Median [Min, Max] | NA [NA, NA] | 64.0 [54.0, 84.0] | 60.0 [53.0, 67.0] | 62.0 [53.0, 84.0] |
| Missing | 29 (100%) | 0 (0%) | 0 (0%) | 29 (34.9%) |
ggplot(right_0.post, aes(x=frame, y = percent_change, group = subject_id, color = subject_id))+
geom_line(show.legend = FALSE)+
facet_grid(rows = vars(user_cat))+
labs(title ="POST data percent change by THC use category")+
theme_bw()
## Within Subject ### Mean function
Calculate the difference (post-pre) within subjects and plot by THC user category
ggplot(right_0.pt, aes(x=frame, y = wiPctChg, group = subject_id, color = subject_id))+
geom_line(show.legend = FALSE)+
facet_grid(rows = vars(user_cat))+
labs(title ="Within subject difference in percent change by THC use category")+
theme_bw()
## Warning: Removed 3309 row(s) containing missing values (geom_path).
Non-user plot seems “centered” around 0 but still showing
heterogeneity. Lots of heterogeneity across user-groups, but a mostly
positive difference.
mean_fxn.pt <- as.data.frame(aggregate(right_0.pt.w[, 3:601],
list(right_0.pt.w$user_cat),
FUN = function(x) mean(x, na.rm = T)))
wiPctChg <- names(right_0.pt.w)[3:601]
mean_fxn.pt.l <- reshape(mean_fxn.pt,
varying = wiPctChg,
v.names = "wi_percent_change",
timevar = "frame",
times = wiPctChg,
direction = "long")
mean_fxn.pt.l$frame <- as.numeric(gsub("wiPctChg.", "", mean_fxn.pt.l$frame))
rownames(mean_fxn.pt.l) <- NULL
mean_fxn.pt.l$id <- NULL
names(mean_fxn.pt.l)[names(mean_fxn.pt.l) == "Group.1"] <- "user_cat"
ggplot(mean_fxn.pt.l, aes(x=frame, y = wi_percent_change, group = user_cat, color = user_cat))+
geom_line()+
labs(title ="Average Within subject difference in percent change by THC use category")+
theme_bw()
## Warning: Removed 361 row(s) containing missing values (geom_path).
Difference in trajectories (post-pre), show a distinct difference between non-user and both user groups (up to frame 200), but the differences might not be significant. Harder to distinguish between the occasional and daily user trajectories.
# Check to make sure there are significant numbers in each user category
table(right_0.pt.w$user_cat)
##
## non-user occasional daily
## 29 29 25
sd_fxn.pt <- as.data.frame(aggregate(right_0.pt.w[, 3:601],
list(right_0.pt.w$user_cat),
FUN = function(x) sd(x, na.rm = T)))
wiPctChg <- names(right_0.pt.w)[3:601]
sd_fxn.pt.l <- reshape(sd_fxn.pt,
varying = wiPctChg,
v.names = "wi_percent_change",
timevar = "frame",
times = wiPctChg,
direction = "long")
sd_fxn.pt.l$frame <- as.numeric(gsub("wiPctChg.", "", sd_fxn.pt.l$frame))
rownames(sd_fxn.pt.l) <- NULL
sd_fxn.pt.l$id <- NULL
names(sd_fxn.pt.l)[names(sd_fxn.pt.l) == "Group.1"] <- "user_cat"
sd_fxn.pt.l$wi_percent_change_neg <- -1*sd_fxn.pt.l$wi_percent_change
ggplot(sd_fxn.pt.l, aes(x=frame, y = wi_percent_change, group = user_cat, color = user_cat))+
geom_line()+
geom_line(aes(x=frame, y = wi_percent_change_neg, group = user_cat, color = user_cat),
linetype = "dashed")+
labs(title ="+/- 1 SD Within subject difference in percent change by THC use category")+
theme_bw()
## Warning: Removed 422 row(s) containing missing values (geom_path).
## Removed 422 row(s) containing missing values (geom_path).
## number of missing data points in each column
missingnessCol <- apply(right_0.pt.w[, 3:601],
MARGIN = 2,
FUN = function(x) sum(is.na(x)))
sum(missingnessCol == 0) ## 140 columns without any missing data
## [1] 140
sum(missingnessCol == 84) ## 109 columns with all missing data
## [1] 0
sum(missingnessCol <= 68) ## 438 columns where at least 80% of participants have data
## [1] 440
hist(missingnessCol, breaks = 30)
missing.df <- data.frame(frame = seq(0, 598, by =1),
missingness = round(missingnessCol/nrow(right_0.pt.w)*100, 2),
stringsAsFactors = F)
ggplot(missing.df, aes(x=frame, y= missingness))+
geom_line()
Truncate within person difference data to frame 400 where missingness
reaches 50%.
Try to visualize missing data in within person data frame
wi_pct_chg <- names(right_0.pt.w)[3:601]
right_0.pt.l <- reshape(right_0.pt.w,
varying = wi_pct_chg,
v.names = "wi_pct_chg_diff",
timevar = 'frame',
times = wi_pct_chg,
direction = "long")
right_0.pt.l$frame <- as.numeric(gsub("wiPctChg.", "", right_0.pt.l$frame))
rownames(right_0.pt.l) <- NULL
right_0.pt.l$id <- NULL
right_0.pt.l$notMissing <- ifelse(is.na(right_0.pt.l$wi_pct_chg_diff), 0, 1)
ggplot(right_0.pt.l, aes(x = as.factor(frame), y = subject_id, fill = as.factor(notMissing)))+
geom_raster(alpha = 0.8)+
scale_fill_manual(values = c("black", 'white'),
labels = c("Missing", "Present"))+
theme(axis.text.x=element_text(angle = 45, vjust = 1, hjust = 1))
right_0_wi.fpca <- fpca.face(Y = as.matrix(right_0.pt.w[, 3:401]), pve = 0.95, knots = 30, var = T)
# str(right_0.fpca)
# plot_shiny(right_0.fpca)
right_0_wi.fpca.pve <- round(right_0_wi.fpca$evalues/sum(right_0_wi.fpca$evalues)*100, 2)
mu <- right_0_wi.fpca$mu
right_sd <- sqrt(right_0_wi.fpca$evalues)
right_Phi <- right_0_wi.fpca$efunctions
wi.df_plot <- data.frame(frame = seq(0, 398, by = 1),
mu = mu,
errband1 = 2*right_Phi[, 1]*right_sd[1],
errband2 = 2*right_Phi[, 2]*right_sd[2],
errband3 = 2*right_Phi[, 3]*right_sd[3],
errband4 = 2*right_Phi[, 4]*right_sd[4],
errband5 = 2*right_Phi[, 5]*right_sd[5],
errband6 = 2*right_Phi[, 6]*right_sd[6])
colors <- c("Pop.Mean" = "black", "+2SD" = "red", "-2SD" = "blue")
wi.plot_pc1 <- ggplot(data = wi.df_plot, aes(x = frame, y = mu))+
geom_line(aes(color = "Pop.Mean"))+
geom_line(aes(x = frame, y = mu+errband1, color = "+2SD"))+
geom_line(aes(x = frame, y = mu-errband1, color = "-2SD"))+
labs(x = "frame",
y = "Percent Change",
color = "Legend",
title = paste("PC1:", "% Var Explained:", right_0_wi.fpca.pve[1]))+
scale_color_manual(values = colors)+
theme_bw()
wi.plot_pc2 <- ggplot(data = wi.df_plot, aes(x = frame, y = mu))+
geom_line(aes(color = "Pop.Mean"))+
geom_line(aes(x = frame, y = mu+errband2, color = "+2SD"))+
geom_line(aes(x = frame, y = mu-errband2, color = "-2SD"))+
labs(x = "frame",
y = "Percent Change",
color = "Legend",
title = paste("PC2:", "% Var Explained:", right_0_wi.fpca.pve[2]))+
scale_color_manual(values = colors)+
theme_bw()
wi.plot_pc3 <- ggplot(data = wi.df_plot, aes(x = frame, y = mu))+
geom_line(aes(color = "Pop.Mean"))+
geom_line(aes(x = frame, y = mu+errband3, color = "+2SD"))+
geom_line(aes(x = frame, y = mu-errband3, color = "-2SD"))+
labs(x = "frame",
y = "Percent Change",
color = "Legend",
title = paste("PC3:", "% Var Explained:", right_0_wi.fpca.pve[3]))+
scale_color_manual(values = colors)+
theme_bw()
wi.plot_pc4 <- ggplot(data = wi.df_plot, aes(x = frame, y = mu))+
geom_line(aes(color = "Pop.Mean"))+
geom_line(aes(x = frame, y = mu+errband4, color = "+2SD"))+
geom_line(aes(x = frame, y = mu-errband4, color = "-2SD"))+
labs(x = "frame",
y = "Percent Change",
color = "Legend",
title = paste("PC4:", "% Var Explained:", right_0_wi.fpca.pve[4]))+
scale_color_manual(values = colors)+
theme_bw()
wi.plot_pc5 <- ggplot(data = wi.df_plot, aes(x = frame, y = mu))+
geom_line(aes(color = "Pop.Mean"))+
geom_line(aes(x = frame, y = mu+errband5, color = "+2SD"))+
geom_line(aes(x = frame, y = mu-errband5, color = "-2SD"))+
labs(x = "frame",
y = "Percent Change",
color = "Legend",
title = paste("PC5:", "% Var Explained:", right_0_wi.fpca.pve[5]))+
scale_color_manual(values = colors)+
theme_bw()
wi.plot_pc6 <- ggplot(data = wi.df_plot, aes(x = frame, y = mu))+
geom_line(aes(color = "Pop.Mean"))+
geom_line(aes(x = frame, y = mu+errband6, color = "+2SD"))+
geom_line(aes(x = frame, y = mu-errband6, color = "-2SD"))+
labs(x = "frame",
y = "Percent Change",
color = "Legend",
title = paste("PC6:", "% Var Explained:", right_0_wi.fpca.pve[6]))+
scale_color_manual(values = colors)+
theme_bw()
wi.plot_pc1
PC1: Individuals with high scores on PC1 have a weaker minimal constriction at post compared to pre. Individuals with low scores on PC1 have a stronger minimal constriction at post compared to pre.
wi.scores <- as.data.frame(right_0_wi.fpca$scores)
names(wi.scores) <- c("PC1", "PC2", "PC3", "PC4", "PC5", "PC6")
wi.scores$subject_id <- rownames(wi.scores)
# pc_ind_plots <- function(scores.df, raw.df, pc_plot.df, q, pc= "PC1", id = "subject_id", pc.val = 1, pc_plot.var = "wiPctChg", raw.plot.var = "percent_change"){
# q.pc <- quantile(scores.df[, pc], probs = q)
# q.ids <- scores.df[scores.df[, pc] > q.pc, id]
#
# q.df <- pc_plot.df[pc_plot.df[, id] %in% q.ids, ]
#
# colors <- c("Pop.Mean" = "black", "+2SD" = "red", "-2SD" = "blue")
#
# print(ggplot(data = pc_plot.df, aes(x=frame, y = mu))+
# geom_line(aes(color = "Pop.Mean"))+
# geom_line(aes(x = frame, y = mu+ pc_plot.df[, 2+pc.val], color = "+2SD"))+
# geom_line(aes(x = frame, y = mu- pc_plot.df[, 2+pc.val], color = "-2SD"))+
# labs(x = "frame",
# y = "Percent Change",
# color = "Legend",
# title = paste0("Individuals in the ", q*100,"th Percentile of PC1 scores"))+
# geom_line(data = q.df, aes(x=frame, y = pc_plot.var, group = id))+
# #scale_color_manual(values = colors)+
# theme_bw())
#
# for(i in q.ids){
# print(ggplot(data = raw.df[raw.df[, id] == i, ],
# aes(x = frame, y = raw.plot.var, group = tp, color = tp))+
# geom_line()+
# labs(title = paste0(i, " Pre/Post: ", q*100 , "th Percentile PC1 scores"))+
# theme_bw())
# }
# }
# pc_ind_plots(scores.df = wi.scores,
# raw.df = right_0.df,
# pc_plot.df = right_0.pt,
# q = 0.95, pc = "PC1", id = "subject_id", pc.val = 1, pc_plot.var = "wiPctChg", raw.plot.var = "percent_change")
q.95 <- quantile(wi.scores$PC1, probs = 0.95)
pctile95.wi <- wi.scores$subject_id[wi.scores$PC1 > q.95]
pctile95.wi.df <- right_0.pt[right_0.pt$subject_id %in% pctile95.wi, ]
ggplot(data = wi.df_plot, aes(x = frame, y = mu))+
geom_line(aes(color = "Pop.Mean"))+
geom_line(aes(x = frame, y = mu+errband1, color = "+2SD"))+
geom_line(aes(x = frame, y = mu-errband1, color = "-2SD"))+
labs(x = "frame",
y = "Percent Change",
color = "Legend",
title = "Individuals in the Top 5th Percentile of PC1 scores")+
geom_line(data = pctile95.wi.df, aes(x=frame, y = wiPctChg, group = subject_id, color = subject_id))+
scale_color_manual(values = colors)+
theme_bw()
## Warning: Removed 234 row(s) containing missing values (geom_path).
for(i in pctile95.wi){
print(ggplot(data = right_0.df[right_0.df$subject_id == i, ],
aes(x = frame, y = percent_change, group = tp, color = tp))+
geom_line()+
labs(title = paste0(i, " Pre/Post: Top 5th Percentile PC1 scores"))+
theme_bw())
}
q.05 <- quantile(wi.scores$PC1, probs = 0.05)
pctile05.wi <- wi.scores$subject_id[wi.scores$PC1 < q.05]
pctile05.wi.df <- right_0.pt[right_0.pt$subject_id %in% pctile05.wi, ]
ggplot(data = wi.df_plot, aes(x = frame, y = mu))+
geom_line(aes(color = "Pop.Mean"))+
geom_line(aes(x = frame, y = mu+errband1, color = "+2SD"))+
geom_line(aes(x = frame, y = mu-errband1, color = "-2SD"))+
labs(x = "frame",
y = "Percent Change",
color = "Legend",
title = "Individuals in the 5th Percentile of PC1 scores")+
geom_line(data = pctile05.wi.df, aes(x=frame, y = wiPctChg, group = subject_id, color = subject_id))+
scale_color_manual(values = colors)+
theme_bw()
## Warning: Removed 399 row(s) containing missing values (geom_path).
for(i in pctile05.wi){
print(ggplot(data = right_0.df[right_0.df$subject_id == i, ],
aes(x = frame, y = percent_change, group = tp, color = tp))+
geom_line()+
labs(title = paste0(i, " Pre/Post: 5th Percentile PC1 scores"))+
theme_bw())
}
wi.plot_pc2
PC2: Individuals with high loading on PC2 have a weaker initial constriction at post and a stronger rebound dilation past the 200th frame. Individuals with a low loading on PC2 have a stronger initial constriction at paost and a weaker rebound dilation past the 200th frame.
q.95 <- quantile(wi.scores$PC2, probs = 0.95)
pctile95.wi <- wi.scores$subject_id[wi.scores$PC2 > q.95]
pctile95.wi.df <- right_0.pt[right_0.pt$subject_id %in% pctile95.wi, ]
ggplot(data = wi.df_plot, aes(x = frame, y = mu))+
geom_line(aes(color = "Pop.Mean"))+
geom_line(aes(x = frame, y = mu+errband2, color = "+2SD"))+
geom_line(aes(x = frame, y = mu-errband2, color = "-2SD"))+
labs(x = "frame",
y = "Percent Change",
color = "Legend",
title = "Individuals in the Top 5th Percentile of PC2 scores")+
geom_line(data = pctile95.wi.df, aes(x=frame, y = wiPctChg, group = subject_id, color = subject_id))+
scale_color_manual(values = colors)+
theme_bw()
## Warning: Removed 111 row(s) containing missing values (geom_path).
for(i in pctile95.wi){
print(ggplot(data = right_0.df[right_0.df$subject_id == i, ],
aes(x = frame, y = percent_change, group = tp, color = tp))+
geom_line()+
labs(title = paste0(i, " Pre/Post: Top 5th Percentile PC2 scores"))+
theme_bw())
}
q.05 <- quantile(wi.scores$PC2, probs = 0.05)
pctile05.wi <- wi.scores$subject_id[wi.scores$PC2 < q.05]
pctile05.wi.df <- right_0.pt[right_0.pt$subject_id %in% pctile05.wi, ]
ggplot(data = wi.df_plot, aes(x = frame, y = mu))+
geom_line(aes(color = "Pop.Mean"))+
geom_line(aes(x = frame, y = mu+errband2, color = "+2SD"))+
geom_line(aes(x = frame, y = mu-errband2, color = "-2SD"))+
labs(x = "frame",
y = "Percent Change",
color = "Legend",
title = "Individuals in the 5th Percentile of PC2 scores")+
geom_line(data = pctile05.wi.df, aes(x=frame, y = wiPctChg, group = subject_id, color = subject_id))+
scale_color_manual(values = colors)+
theme_bw()
## Warning: Removed 264 row(s) containing missing values (geom_path).
for(i in pctile05.wi){
print(ggplot(data = right_0.df[right_0.df$subject_id == i, ],
aes(x = frame, y = percent_change, group = tp, color = tp))+
geom_line()+
labs(title = paste0(i, " Pre/Post: 5th Percentile PC2 scores"))+
theme_bw())
}
wi.plot_pc3
q.95 <- quantile(wi.scores$PC3, probs = 0.95)
pctile95.wi <- wi.scores$subject_id[wi.scores$PC3 > q.95]
pctile95.wi.df <- right_0.pt[right_0.pt$subject_id %in% pctile95.wi, ]
ggplot(data = wi.df_plot, aes(x = frame, y = mu))+
geom_line(aes(color = "Pop.Mean"))+
geom_line(aes(x = frame, y = mu+errband3, color = "+2SD"))+
geom_line(aes(x = frame, y = mu-errband3, color = "-2SD"))+
labs(x = "frame",
y = "Percent Change",
color = "Legend",
title = "Individuals in the Top 5th Percentile of PC3 scores")+
geom_line(data = pctile95.wi.df, aes(x=frame, y = wiPctChg, group = subject_id, color = subject_id))+
scale_color_manual(values = colors)+
theme_bw()
## Warning: Removed 278 row(s) containing missing values (geom_path).
for(i in pctile95.wi){
print(ggplot(data = right_0.df[right_0.df$subject_id == i, ],
aes(x = frame, y = percent_change, group = tp, color = tp))+
geom_line()+
labs(title = paste0(i, " Pre/Post: Top 5th Percentile PC3 scores"))+
theme_bw())
}
q.05 <- quantile(wi.scores$PC3, probs = 0.05)
pctile05.wi <- wi.scores$subject_id[wi.scores$PC3 < q.05]
pctile05.wi.df <- right_0.pt[right_0.pt$subject_id %in% pctile05.wi, ]
ggplot(data = wi.df_plot, aes(x = frame, y = mu))+
geom_line(aes(color = "Pop.Mean"))+
geom_line(aes(x = frame, y = mu+errband3, color = "+2SD"))+
geom_line(aes(x = frame, y = mu-errband3, color = "-2SD"))+
labs(x = "frame",
y = "Percent Change",
color = "Legend",
title = "Individuals in the 5th Percentile of PC3 scores")+
geom_line(data = pctile05.wi.df, aes(x=frame, y = wiPctChg, group = subject_id, color = subject_id))+
scale_color_manual(values = colors)+
theme_bw()
## Warning: Removed 144 row(s) containing missing values (geom_path).
for(i in pctile05.wi){
print(ggplot(data = right_0.df[right_0.df$subject_id == i, ],
aes(x = frame, y = percent_change, group = tp, color = tp))+
geom_line()+
labs(title = paste0(i, " Pre/Post: 5th Percentile PC3 scores"))+
theme_bw())
}
wi.plot_pc4
wi.plot_pc5
wi.plot_pc6
### Within person difference PC explanations
### INVESTIGATE BUMPS IN MEAN Function
## Pre/Post Across subjects
ggplot(mean_fxn.l, aes(x=frame, y = percent_change, group = grp,
color = user_cat))+
geom_line()+
xlim(50, 200)+
facet_grid(rows = vars(tp))+
labs(title = "Average Percent Change by Pre/Post and THC use category")+
theme_bw()
## Warning: Removed 2688 row(s) containing missing values (geom_path).
right_0.w[right_0.w$user_cat == "non-user" & right_0.w$tp == "pre2", 103:105]
## percent_change.100 percent_change.101 percent_change.102
## 001-003_pre2 -39.841378 NA -40.056700
## 001-004_pre2 -20.509286 -20.636732 -20.765188
## 001-007_pre2 -29.728903 -30.372171 -30.985599
## 001-009_pre2 -12.595393 -12.384864 -12.173821
## 001-015_pre2 -27.419396 -27.277259 -27.112306
## 001-029_pre2 -22.422026 -22.324030 -22.238468
## 001-030_pre2 -19.904482 -19.730264 -19.562660
## 001-031_pre2 -26.659914 -26.707617 -26.723738
## 001-032_pre2 -25.607501 -25.790114 -25.972873
## 001-037_pre2 -10.766774 -10.595359 -10.422002
## 001-038_pre2 -14.582329 -14.220651 -13.880713
## 001-039_pre2 -8.698690 -8.442030 -8.215627
## 001-040_pre2 -13.961272 -13.965766 -13.969542
## 001-041_pre2 -22.801896 -23.144297 -23.480081
## 001-042_pre2 -22.334268 -22.256735 -22.200964
## 001-043_pre2 -11.625394 -11.267720 -10.929376
## 001-045_pre2 -24.484764 -23.910475 -23.359554
## 001-046_pre2 -39.228644 -39.117800 -39.005777
## 001-047_pre2 -8.537555 -8.481596 -8.428778
## 001-049_pre2 -20.116576 -19.984262 -19.863703
## 001-050_pre2 -29.870190 -29.993167 -30.145066
## 001-053_pre2 -31.777050 -31.811150 -31.839822
## 001-054_pre2 -19.003078 -18.907631 -18.830007
## 001-055_pre2 -22.701872 -23.326338 -23.938734
## 001-062_pre2 -5.394998 -5.303947 -5.217166
## 001-076_pre2 -43.094073 -42.798501 -42.471884
## 001-077_pre2 -9.357431 -9.313066 -9.234195
## 001-095_pre2 -18.898660 -18.708524 -18.520319
## 001-097_pre2 -9.338060 -9.588896 -9.851689
## 001-099_pre2 -32.517179 -32.697862 -32.884544
right_0.w[right_0.w$user_cat == "non-user" & right_0.w$tp == "post", 101:103]
## percent_change.98 percent_change.99 percent_change.100
## 001-003_post -27.753056 -27.913264 -28.065082
## 001-004_post -5.257832 -5.198141 -5.149285
## 001-005_post -21.013709 -20.832539 -20.645492
## 001-007_post -18.738419 -18.445158 -18.149873
## 001-009_post -15.268272 -15.431165 -15.637139
## 001-015_post -23.671938 -23.558792 -23.444028
## 001-029_post -15.334405 -15.258121 -15.180255
## 001-030_post -12.088334 -12.022693 -11.954151
## 001-031_post -32.884257 -32.661430 -32.418305
## 001-032_post -17.526118 -17.369644 -17.229585
## 001-037_post -7.295840 -7.301405 -7.306222
## 001-038_post -14.328061 -14.119133 -13.920714
## 001-039_post -6.300429 -6.357287 -6.401905
## 001-040_post -9.856376 -9.737562 -9.617909
## 001-041_post -11.783820 -11.723059 -11.672176
## 001-042_post -23.864704 -23.870622 -23.857456
## 001-043_post -9.449283 -9.492914 -9.533265
## 001-045_post -46.643347 -46.578226 -46.528424
## 001-046_post -24.943494 -24.785338 -24.634926
## 001-047_post -5.657225 -5.615010 -5.597963
## 001-049_post -12.144504 -12.199826 -12.238348
## 001-050_post -32.971581 -33.069765 -33.122365
## 001-053_post -18.045911 -17.931763 -17.810962
## 001-054_post -27.036703 -27.191479 -27.350672
## 001-055_post -12.115957 -11.958694 -11.809640
## 001-062_post -8.366234 -8.281283 -8.206952
## 001-076_post -51.349015 NA -51.523083
## 001-077_post -3.120474 -3.135075 -3.153623
## 001-097_post -14.253318 -13.977159 -13.698777
## 001-099_post -15.639890 -15.662828 -15.693056
## Within person plots
ggplot(mean_fxn.pt.l, aes(x=frame, y = wi_percent_change, group = user_cat, color = user_cat))+
geom_line()+
xlim(50, 200)+
labs(title ="Average Within subject difference in percent change by THC use category")+
theme_bw()
## Warning: Removed 1344 row(s) containing missing values (geom_path).
## Mean function Spike in Occassional user group
mean_fxn.pt[mean_fxn.pt$Group.1 == "occasional", 113:119]
## wiPctChg.112 wiPctChg.113 wiPctChg.114 wiPctChg.115 wiPctChg.116 wiPctChg.117
## 2 10.21026 10.19975 10.18894 9.336816 9.651207 10.6294
## wiPctChg.118
## 2 10.15556
right_0.pt.w[right_0.pt.w$user_cat == "occasional", 116:119]
## wiPctChg.114 wiPctChg.115 wiPctChg.116 wiPctChg.117
## 001-006 11.5010736 11.5256630 11.5441500 11.5567771
## 001-011 13.8610423 13.4068052 12.9843136 12.5971631
## 001-014 21.3107326 21.4515436 21.6131807 21.7968208
## 001-021 -1.1912416 -0.9231421 -0.6790802 -0.4625531
## 001-026 9.7779962 9.7134617 9.6452439 9.5739134
## 001-033 -4.5904810 -4.5847367 -4.5874266 -4.5993762
## 001-061 -3.1586454 -3.2525226 -3.3458427 -3.4373346
## 001-066 -2.9926603 -2.6954025 -2.4078270 -2.1356420
## 001-068 -2.3234722 -2.5620425 -2.8047079 NA
## 001-069 10.3751083 10.3261663 10.2674197 10.2007111
## 001-070 -6.3680231 -6.6819575 -6.9642840 -7.2115993
## 001-073 1.0988326 0.8742189 0.6410650 0.3974723
## 001-074 34.3975966 NA 34.2126412 33.9469985
## 001-079 1.5458615 1.5596127 1.5732513 1.5872532
## 001-098 13.7185797 13.7309383 13.7531044 13.7858458
## 001-103 13.3247626 13.1186223 12.9151765 12.7150573
## 001-104 12.7854841 13.1440297 13.4777229 13.7848223
## 001-105 5.5596362 5.0371812 4.5128967 3.9884444
## 001-106 31.6685053 31.4176485 31.1680513 30.9234781
## 001-107 13.0855226 13.1838102 13.2833605 13.3842042
## 001-108 0.6176109 0.8447543 1.0849742 1.3358006
## 001-110 20.8014945 20.6608085 20.5292836 20.4062968
## 001-111 24.6366830 24.5955681 NA 24.5397456
## 001-112 5.4392987 5.2952310 5.1482113 5.0007441
## 001-113 4.3581125 4.6115089 4.8917672 5.1963956
## 001-114 8.3133753 NA 9.0936072 9.6649670
## 001-116 9.5616376 10.2337297 10.9367739 11.6657409
## 001-117 26.0633785 25.9994142 25.9175125 25.8186124
## 001-118 22.3015542 22.0631247 21.8292447 21.6023908
## Mean function Spike in Daily user group
mean_fxn.pt[mean_fxn.pt$Group.1 == "daily", 133:149]
## wiPctChg.132 wiPctChg.133 wiPctChg.134 wiPctChg.135 wiPctChg.136 wiPctChg.137
## 3 9.277331 9.745381 9.979741 9.761392 9.535792 10.07879
## wiPctChg.138 wiPctChg.139 wiPctChg.140 wiPctChg.141 wiPctChg.142 wiPctChg.143
## 3 10.10521 10.12753 9.170105 9.967127 10.84453 12.05176
## wiPctChg.144 wiPctChg.145 wiPctChg.146 wiPctChg.147 wiPctChg.148
## 3 9.733335 10.18234 10.17635 10.1437 10.15056
right_0.pt.w[right_0.pt.w$user_cat == "daily", 143:146]
## wiPctChg.141 wiPctChg.142 wiPctChg.143 wiPctChg.144
## 001-002 NA 14.7153791 14.601849 14.495408
## 001-008 14.7396420 14.7359309 14.729108 14.719145
## 001-010 11.4783127 11.7425889 12.013432 12.288979
## 001-012 33.4153783 33.3457661 33.274294 33.200908
## 001-013 1.9760770 NA 2.017467 2.002911
## 001-018 0.7302098 0.8743156 1.019096 1.163107
## 001-022 19.1083615 19.0395858 18.998221 18.982808
## 001-024 3.7780247 3.7627633 NA 3.822039
## 001-027 -1.7850082 -1.7402579 -1.684847 -1.620898
## 001-044 10.2479799 10.1786281 10.105654 10.028880
## 001-056 3.6104524 3.6402340 3.666943 3.690611
## 001-063 20.4580365 20.7316590 20.963443 NA
## 001-064 2.7132570 NA 3.009631 3.197956
## 001-067 11.1802980 11.1078556 11.034733 10.960500
## 001-075 15.0383830 15.2525116 15.432585 15.575752
## 001-081 20.1667975 19.9577148 19.758334 19.568175
## 001-083 6.5437739 6.7744270 NA 7.279460
## 001-085 2.4370639 2.4914371 NA 2.661117
## 001-086 15.4687276 15.3217226 15.147036 14.945924
## 001-088 18.6395264 18.5982227 18.530299 18.439520
## 001-090 12.8303747 13.4777605 14.106619 14.712315
## 001-091 2.7300374 2.3344146 1.952296 1.587665
## 001-093 10.2499191 10.0865217 9.897907 9.687709
## 001-094 -11.5874056 -11.7885342 NA -12.022410
## 001-096 15.0428194 14.7835467 14.512852 14.232459
Jagged changes in the mean function lines seems to stem from missing data at those frames in the data set. The subjects per user-group is between 25 - 30, so missing data for one individual has a noticeable effect on mean function line.
post_right_0.fpca <- fpca.face(Y = as.matrix(right_0.post.w[, 4:602]), pve = 0.95, knots = 30, var = T)
post_right_0.fpca.pve <- round(post_right_0.fpca$evalues/sum(post_right_0.fpca$evalues)*100, 2)
post_right_scores <- as.data.frame(post_right_0.fpca$scores)
names(post_right_scores) <- c("PC1", "PC2", "PC3", "PC4")
post_right_scores$subject_id <- rownames(post_right_scores)
pt.scores <- merge(post_right_scores, pt.df,
by = c("subject_id"),
all.x = T)
pt.scores$smoker <- ifelse(pt.scores$user_cat %in% c("daily", "occasional"),1, 0)
m1 <- glm(smoker ~ PC1 + PC2 + PC3 +PC4, family = "binomial", data = pt.scores)
summary(m1)
##
## Call:
## glm(formula = smoker ~ PC1 + PC2 + PC3 + PC4, family = "binomial",
## data = pt.scores)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -1.8581 -1.1192 0.7297 0.9121 1.4154
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 0.676904 0.244904 2.764 0.00571 **
## PC1 -0.001032 0.001596 -0.647 0.51786
## PC2 -0.006739 0.004997 -1.348 0.17751
## PC3 0.016328 0.007708 2.118 0.03414 *
## PC4 0.004515 0.009146 0.494 0.62154
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 107.414 on 82 degrees of freedom
## Residual deviance: 99.844 on 78 degrees of freedom
## AIC: 109.84
##
## Number of Fisher Scoring iterations: 4
pt.scores$pred <- predict(m1, pt.scores)
pt.scores.roc <- roc(response = pt.scores$smoker, predictor = pt.scores$pred)
## Setting levels: control = 0, case = 1
## Setting direction: controls < cases
pt.scores.roc
##
## Call:
## roc.default(response = pt.scores$smoker, predictor = pt.scores$pred)
##
## Data: pt.scores$pred in 29 controls (pt.scores$smoker 0) < 54 cases (pt.scores$smoker 1).
## Area under the curve: 0.6488
plot.roc(pt.scores.roc)
pt.wi.scores <- merge(wi.scores, pt.df,
by = "subject_id",
all.x = T)
pt.wi.scores$smoker <- ifelse(pt.wi.scores$user_cat %in% c("daily", "occasional"),1, 0)
m2 <- glm(smoker ~ PC1 + PC2 + PC3 + PC4+ PC5+ PC6, family = "binomial", data = pt.wi.scores)
summary(m2)
##
## Call:
## glm(formula = smoker ~ PC1 + PC2 + PC3 + PC4 + PC5 + PC6, family = "binomial",
## data = pt.wi.scores)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -2.0000 -0.8940 0.4882 0.9015 1.5602
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 0.809193 0.276593 2.926 0.00344 **
## PC1 0.004114 0.001955 2.105 0.03533 *
## PC2 0.006158 0.003991 1.543 0.12278
## PC3 -0.013195 0.005702 -2.314 0.02067 *
## PC4 -0.008717 0.007334 -1.189 0.23460
## PC5 -0.014933 0.008988 -1.662 0.09661 .
## PC6 0.007149 0.010622 0.673 0.50094
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 107.414 on 82 degrees of freedom
## Residual deviance: 89.431 on 76 degrees of freedom
## AIC: 103.43
##
## Number of Fisher Scoring iterations: 5
pt.wi.scores$pred <- predict(m2, pt.wi.scores)
pt.wi.scores.roc <- roc(response = pt.wi.scores$smoker, predictor = pt.wi.scores$pred)
## Setting levels: control = 0, case = 1
## Setting direction: controls < cases
pt.wi.scores.roc
##
## Call:
## roc.default(response = pt.wi.scores$smoker, predictor = pt.wi.scores$pred)
##
## Data: pt.wi.scores$pred in 29 controls (pt.wi.scores$smoker 0) < 54 cases (pt.wi.scores$smoker 1).
## Area under the curve: 0.7586
plot.roc(pt.wi.scores.roc)
summary(m1)
##
## Call:
## glm(formula = smoker ~ PC1 + PC2 + PC3 + PC4, family = "binomial",
## data = pt.scores)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -1.8581 -1.1192 0.7297 0.9121 1.4154
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 0.676904 0.244904 2.764 0.00571 **
## PC1 -0.001032 0.001596 -0.647 0.51786
## PC2 -0.006739 0.004997 -1.348 0.17751
## PC3 0.016328 0.007708 2.118 0.03414 *
## PC4 0.004515 0.009146 0.494 0.62154
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 107.414 on 82 degrees of freedom
## Residual deviance: 99.844 on 78 degrees of freedom
## AIC: 109.84
##
## Number of Fisher Scoring iterations: 4
pt.scores.roc
##
## Call:
## roc.default(response = pt.scores$smoker, predictor = pt.scores$pred)
##
## Data: pt.scores$pred in 29 controls (pt.scores$smoker 0) < 54 cases (pt.scores$smoker 1).
## Area under the curve: 0.6488
plot.roc(pt.scores.roc)
m5 <- glm(smoker ~ PC1 + PC2 + PC3 +PC4, family = "binomial", data = pt.wi.scores)
summary(m5)
##
## Call:
## glm(formula = smoker ~ PC1 + PC2 + PC3 + PC4, family = "binomial",
## data = pt.wi.scores)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -1.8653 -1.0348 0.5976 0.9036 1.6448
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 0.753259 0.261977 2.875 0.00404 **
## PC1 0.003940 0.001932 2.040 0.04137 *
## PC2 0.005630 0.003762 1.496 0.13455
## PC3 -0.013747 0.005807 -2.368 0.01791 *
## PC4 -0.008183 0.007178 -1.140 0.25429
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 107.41 on 82 degrees of freedom
## Residual deviance: 92.67 on 78 degrees of freedom
## AIC: 102.67
##
## Number of Fisher Scoring iterations: 4
pt.wi.scores$pred_sameNPC <- predict(m5, pt.wi.scores)
pt.wi.scores.roc2 <- roc(response = pt.wi.scores$smoker,
predictor = pt.wi.scores$pred_sameNPC)
## Setting levels: control = 0, case = 1
## Setting direction: controls < cases
pt.wi.scores.roc2
##
## Call:
## roc.default(response = pt.wi.scores$smoker, predictor = pt.wi.scores$pred_sameNPC)
##
## Data: pt.wi.scores$pred_sameNPC in 29 controls (pt.wi.scores$smoker 0) < 54 cases (pt.wi.scores$smoker 1).
## Area under the curve: 0.7178
plot.roc(pt.wi.scores.roc2)
pt.scores$male <- ifelse(pt.scores$demo_gender == "Male", 1, 0)
pt.scores$non_user <- ifelse(pt.scores$user_cat == "non-user", 1, 0)
pt.scores$daily <- ifelse(pt.scores$user_cat == "daily", 1, 0)
pt.scores$occasional <- ifelse(pt.scores$user_cat == "occasional", 1, 0)
pt.wi.scores$male <- ifelse(pt.wi.scores$demo_gender == "Male", 1, 0)
pt.wi.scores$non_user <- ifelse(pt.wi.scores$user_cat == "non-user", 1, 0)
pt.wi.scores$daily <- ifelse(pt.wi.scores$user_cat == "daily", 1, 0)
pt.wi.scores$occasional <- ifelse(pt.wi.scores$user_cat == "occasional", 1, 0)
chart.Correlation(pt.scores[, c("PC1", "PC2", "PC3", "PC4",
"demo_age","male", "thc.post", "thc_chg", "BMI",
"postConsumpTimeToTest", "smoker",
"non_user", "daily", "occasional")])
## Warning in cor(x, y, use = use, method = method): the standard deviation is zero
## Warning in cor(x, y): the standard deviation is zero
## Warning in cor(x, y, use = use, method = method): the standard deviation is zero
## Warning in cor(x, y): the standard deviation is zero
post.age.m <- lm(demo_age ~ PC1 + PC2 + PC3 + PC4, data = pt.scores)
summary(post.age.m)
##
## Call:
## lm(formula = demo_age ~ PC1 + PC2 + PC3 + PC4, data = pt.scores)
##
## Residuals:
## Min 1Q Median 3Q Max
## -6.8906 -4.1598 -0.5384 3.1860 12.9417
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 31.900074 0.538257 59.266 <2e-16 ***
## PC1 0.003111 0.003334 0.933 0.3536
## PC2 0.017946 0.009914 1.810 0.0741 .
## PC3 -0.017089 0.016477 -1.037 0.3029
## PC4 0.009085 0.019511 0.466 0.6428
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 4.904 on 78 degrees of freedom
## Multiple R-squared: 0.06603, Adjusted R-squared: 0.01814
## F-statistic: 1.379 on 4 and 78 DF, p-value: 0.249
post.wt.m <- lm(demo_weight ~ PC1 + PC2 + PC3 + PC4, data = pt.scores)
summary(post.wt.m)
##
## Call:
## lm(formula = demo_weight ~ PC1 + PC2 + PC3 + PC4, data = pt.scores)
##
## Residuals:
## Min 1Q Median 3Q Max
## -83.423 -30.444 -2.416 23.011 98.854
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 171.317841 4.309418 39.754 <2e-16 ***
## PC1 0.009978 0.026691 0.374 0.710
## PC2 0.062581 0.079375 0.788 0.433
## PC3 -0.136461 0.131916 -1.034 0.304
## PC4 -0.071798 0.156208 -0.460 0.647
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 39.26 on 78 degrees of freedom
## Multiple R-squared: 0.02529, Adjusted R-squared: -0.0247
## F-statistic: 0.5059 on 4 and 78 DF, p-value: 0.7315
post.ht.m <- lm(demo_height ~ PC1 + PC2 + PC3 + PC4, data = pt.scores)
summary(post.ht.m)
##
## Call:
## lm(formula = demo_height ~ PC1 + PC2 + PC3 + PC4, data = pt.scores)
##
## Residuals:
## Min 1Q Median 3Q Max
## -8.7848 -2.8840 -0.1095 2.9179 9.4661
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 68.678448 0.457791 150.021 <2e-16 ***
## PC1 0.002717 0.002835 0.958 0.341
## PC2 -0.011770 0.008432 -1.396 0.167
## PC3 -0.004782 0.014013 -0.341 0.734
## PC4 -0.019140 0.016594 -1.153 0.252
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 4.171 on 78 degrees of freedom
## Multiple R-squared: 0.05275, Adjusted R-squared: 0.004171
## F-statistic: 1.086 on 4 and 78 DF, p-value: 0.3694
post.bmi.m <- lm(BMI ~ PC1 + PC2 + PC3 + PC4, data = pt.scores)
summary(post.bmi.m)
##
## Call:
## lm(formula = BMI ~ PC1 + PC2 + PC3 + PC4, data = pt.scores)
##
## Residuals:
## Min 1Q Median 3Q Max
## -8.9251 -3.2857 -0.3926 2.7153 9.1396
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 2.537e+01 4.822e-01 52.610 <2e-16 ***
## PC1 -9.512e-05 2.987e-03 -0.032 0.9747
## PC2 1.807e-02 8.882e-03 2.034 0.0453 *
## PC3 -1.193e-02 1.476e-02 -0.808 0.4214
## PC4 8.481e-03 1.748e-02 0.485 0.6289
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 4.393 on 78 degrees of freedom
## Multiple R-squared: 0.06117, Adjusted R-squared: 0.01303
## F-statistic: 1.271 on 4 and 78 DF, p-value: 0.2887
post.thc.m <- lm(thc_chg ~ PC1 + PC2 + PC3 + PC4, data = pt.scores)
summary(post.thc.m)
##
## Call:
## lm(formula = thc_chg ~ PC1 + PC2 + PC3 + PC4, data = pt.scores)
##
## Residuals:
## Min 1Q Median 3Q Max
## -15.766 -11.216 -8.663 0.219 111.305
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 11.976730 2.483730 4.822 7.01e-06 ***
## PC1 -0.010878 0.015291 -0.711 0.479
## PC2 -0.014147 0.045473 -0.311 0.757
## PC3 0.066572 0.075773 0.879 0.382
## PC4 0.009741 0.089576 0.109 0.914
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 22.49 on 77 degrees of freedom
## (1 observation deleted due to missingness)
## Multiple R-squared: 0.01761, Adjusted R-squared: -0.03342
## F-statistic: 0.345 on 4 and 77 DF, p-value: 0.8467
post.consump.m <- lm(postConsumpTimeToTest ~ PC1 + PC2 + PC3 + PC4, data = pt.scores)
summary(post.consump.m)
##
## Call:
## lm(formula = postConsumpTimeToTest ~ PC1 + PC2 + PC3 + PC4, data = pt.scores)
##
## Residuals:
## Min 1Q Median 3Q Max
## -11.2348 -3.2603 -0.3894 2.5759 19.2134
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 62.571607 0.789557 79.249 <2e-16 ***
## PC1 0.006043 0.005741 1.053 0.298
## PC2 0.025624 0.016268 1.575 0.122
## PC3 -0.024288 0.031217 -0.778 0.440
## PC4 -0.024522 0.033843 -0.725 0.472
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 5.595 on 49 degrees of freedom
## (29 observations deleted due to missingness)
## Multiple R-squared: 0.0829, Adjusted R-squared: 0.008033
## F-statistic: 1.107 on 4 and 49 DF, p-value: 0.3638
post.male.m <- glm(male ~ PC1 + PC2 + PC3 + PC4, family = "binomial", data = pt.scores)
summary(post.male.m)
##
## Call:
## glm(formula = male ~ PC1 + PC2 + PC3 + PC4, family = "binomial",
## data = pt.scores)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -1.7475 -1.2455 0.7366 1.0930 1.3211
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 0.355524 0.232296 1.530 0.126
## PC1 0.002563 0.001583 1.619 0.105
## PC2 -0.006042 0.005248 -1.151 0.250
## PC3 -0.003285 0.007638 -0.430 0.667
## PC4 0.006224 0.009448 0.659 0.510
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 113.02 on 82 degrees of freedom
## Residual deviance: 108.33 on 78 degrees of freedom
## AIC: 118.33
##
## Number of Fisher Scoring iterations: 4
chart.Correlation(pt.wi.scores[, c("PC1", "PC2", "PC3", "PC4", "PC5", "PC6",
"demo_age","male", "thc.post", "thc_chg", "BMI",
"postConsumpTimeToTest", "smoker",
"non_user", "daily", "occasional")])
## Warning in cor(x, y, use = use, method = method): the standard deviation is zero
## Warning in cor(x, y): the standard deviation is zero
## Warning in cor(x, y, use = use, method = method): the standard deviation is zero
## Warning in cor(x, y): the standard deviation is zero
wi.age.m <- lm(demo_age ~ PC1 + PC2 + PC3 + PC4, data = pt.wi.scores)
summary(wi.age.m)
##
## Call:
## lm(formula = demo_age ~ PC1 + PC2 + PC3 + PC4, data = pt.wi.scores)
##
## Residuals:
## Min 1Q Median 3Q Max
## -7.5547 -3.2751 -0.7433 2.7156 13.7134
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 31.882898 0.543129 58.702 <2e-16 ***
## PC1 0.001020 0.003683 0.277 0.783
## PC2 -0.010495 0.007292 -1.439 0.154
## PC3 0.004444 0.010111 0.440 0.661
## PC4 -0.017603 0.013310 -1.323 0.190
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 4.946 on 78 degrees of freedom
## Multiple R-squared: 0.04981, Adjusted R-squared: 0.001079
## F-statistic: 1.022 on 4 and 78 DF, p-value: 0.4012
wi.wt.m <- lm(demo_weight ~ PC1 + PC2 + PC3 + PC4, data = pt.wi.scores)
summary(wi.wt.m)
##
## Call:
## lm(formula = demo_weight ~ PC1 + PC2 + PC3 + PC4, data = pt.wi.scores)
##
## Residuals:
## Min 1Q Median 3Q Max
## -88.967 -25.318 -3.861 19.517 100.072
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 171.17090 4.14600 41.286 <2e-16 ***
## PC1 0.05410 0.02812 1.924 0.058 .
## PC2 -0.07034 0.05566 -1.264 0.210
## PC3 0.07956 0.07718 1.031 0.306
## PC4 -0.15035 0.10160 -1.480 0.143
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 37.76 on 78 degrees of freedom
## Multiple R-squared: 0.09853, Adjusted R-squared: 0.0523
## F-statistic: 2.131 on 4 and 78 DF, p-value: 0.08475
wi.ht.m <- lm(demo_height ~ PC1 + PC2 + PC3 + PC4, data = pt.wi.scores)
summary(wi.ht.m)
##
## Call:
## lm(formula = demo_height ~ PC1 + PC2 + PC3 + PC4, data = pt.wi.scores)
##
## Residuals:
## Min 1Q Median 3Q Max
## -8.0024 -3.2443 0.1415 2.2602 8.9463
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 68.655644 0.459280 149.485 <2e-16 ***
## PC1 0.002068 0.003115 0.664 0.509
## PC2 -0.006438 0.006166 -1.044 0.300
## PC3 0.004222 0.008550 0.494 0.623
## PC4 -0.016340 0.011255 -1.452 0.151
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 4.182 on 78 degrees of freedom
## Multiple R-squared: 0.04734, Adjusted R-squared: -0.001518
## F-statistic: 0.9689 on 4 and 78 DF, p-value: 0.4294
wi.bmi.m <- lm(BMI ~ PC1 + PC2 + PC3 + PC4, data = pt.wi.scores)
summary(wi.bmi.m)
##
## Call:
## lm(formula = BMI ~ PC1 + PC2 + PC3 + PC4, data = pt.wi.scores)
##
## Residuals:
## Min 1Q Median 3Q Max
## -9.716 -2.721 -1.009 2.568 8.732
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 25.362698 0.478822 52.969 <2e-16 ***
## PC1 0.006205 0.003247 1.911 0.0597 .
## PC2 -0.005958 0.006429 -0.927 0.3569
## PC3 0.008742 0.008914 0.981 0.3298
## PC4 -0.010993 0.011734 -0.937 0.3517
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 4.36 on 78 degrees of freedom
## Multiple R-squared: 0.07508, Adjusted R-squared: 0.02765
## F-statistic: 1.583 on 4 and 78 DF, p-value: 0.1872
wi.thc.m <- lm(thc_chg ~ PC1 + PC2 + PC3 + PC4, data = pt.wi.scores)
summary(wi.thc.m)
##
## Call:
## lm(formula = thc_chg ~ PC1 + PC2 + PC3 + PC4, data = pt.wi.scores)
##
## Residuals:
## Min 1Q Median 3Q Max
## -20.249 -11.152 -6.899 -0.964 109.210
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 11.878930 2.450826 4.847 6.37e-06 ***
## PC1 0.025407 0.016633 1.528 0.131
## PC2 0.013009 0.032717 0.398 0.692
## PC3 -0.046396 0.045499 -1.020 0.311
## PC4 0.003599 0.059717 0.060 0.952
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 22.18 on 77 degrees of freedom
## (1 observation deleted due to missingness)
## Multiple R-squared: 0.04413, Adjusted R-squared: -0.005526
## F-statistic: 0.8887 on 4 and 77 DF, p-value: 0.4749
wi.consump.m <- lm(postConsumpTimeToTest ~ PC1 + PC2 + PC3 + PC4, data = pt.wi.scores)
summary(wi.consump.m)
##
## Call:
## lm(formula = postConsumpTimeToTest ~ PC1 + PC2 + PC3 + PC4, data = pt.wi.scores)
##
## Residuals:
## Min 1Q Median 3Q Max
## -7.6004 -3.2229 -0.2428 2.2992 20.5976
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 61.725821 0.802483 76.919 <2e-16 ***
## PC1 0.003313 0.005764 0.575 0.5681
## PC2 0.013080 0.011889 1.100 0.2766
## PC3 -0.026501 0.015606 -1.698 0.0958 .
## PC4 0.004978 0.017923 0.278 0.7824
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 5.601 on 49 degrees of freedom
## (29 observations deleted due to missingness)
## Multiple R-squared: 0.08099, Adjusted R-squared: 0.005971
## F-statistic: 1.08 on 4 and 49 DF, p-value: 0.3768
wi.male.m <- glm(male ~ PC1 + PC2 + PC3 + PC4, family = "binomial", data = pt.wi.scores)
summary(wi.male.m)
##
## Call:
## glm(formula = male ~ PC1 + PC2 + PC3 + PC4, family = "binomial",
## data = pt.wi.scores)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -1.6076 -1.1858 0.6695 1.0224 1.6609
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 0.3578612 0.2347489 1.524 0.1274
## PC1 0.0023717 0.0016444 1.442 0.1492
## PC2 -0.0070727 0.0036153 -1.956 0.0504 .
## PC3 0.0055780 0.0049014 1.138 0.2551
## PC4 -0.0004479 0.0060586 -0.074 0.9411
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 113.02 on 82 degrees of freedom
## Residual deviance: 105.80 on 78 degrees of freedom
## AIC: 115.8
##
## Number of Fisher Scoring iterations: 4
post_step.df <- pt.scores[, c("PC1", "PC2", "PC3", "PC4", "smoker")]
m.post_step <- glm(smoker ~ PC1 + PC2 + PC3 + PC4, family = "binomial", data = post_step.df)
post_step.res <- step(m.post_step, direction = "backward")
## Start: AIC=109.84
## smoker ~ PC1 + PC2 + PC3 + PC4
##
## Df Deviance AIC
## - PC4 1 100.092 108.09
## - PC1 1 100.271 108.27
## <none> 99.844 109.84
## - PC2 1 101.890 109.89
## - PC3 1 104.646 112.65
##
## Step: AIC=108.09
## smoker ~ PC1 + PC2 + PC3
##
## Df Deviance AIC
## - PC1 1 100.47 106.47
## <none> 100.09 108.09
## - PC2 1 102.22 108.22
## - PC3 1 105.05 111.05
##
## Step: AIC=106.47
## smoker ~ PC2 + PC3
##
## Df Deviance AIC
## <none> 100.47 106.47
## - PC2 1 102.66 106.66
## - PC3 1 105.46 109.46
pt.scores$pred_post_step <- predict(post_step.res, pt.scores)
pt.scores.roc3 <- roc(response = pt.wi.scores$smoker,
predictor = pt.scores$pred_post_step)
## Setting levels: control = 0, case = 1
## Setting direction: controls < cases
pt.scores.roc3
##
## Call:
## roc.default(response = pt.wi.scores$smoker, predictor = pt.scores$pred_post_step)
##
## Data: pt.scores$pred_post_step in 29 controls (pt.wi.scores$smoker 0) < 54 cases (pt.wi.scores$smoker 1).
## Area under the curve: 0.6347
plot.roc(pt.scores.roc3)
wi_step.df <- pt.wi.scores[, c("PC1", "PC2", "PC3", "PC4", "PC5", "PC6", "smoker")]
m.wi_step <- glm(smoker ~ PC1 + PC2 + PC3 + PC4 +PC5 +PC6, family = "binomial", data = wi_step.df)
wi_step.res <- step(m.wi_step, direction = "backward")
## Start: AIC=103.43
## smoker ~ PC1 + PC2 + PC3 + PC4 + PC5 + PC6
##
## Df Deviance AIC
## - PC6 1 89.891 101.89
## - PC4 1 90.929 102.93
## <none> 89.431 103.43
## - PC2 1 91.916 103.92
## - PC5 1 92.423 104.42
## - PC1 1 94.339 106.34
## - PC3 1 95.949 107.95
##
## Step: AIC=101.89
## smoker ~ PC1 + PC2 + PC3 + PC4 + PC5
##
## Df Deviance AIC
## - PC4 1 91.279 101.28
## <none> 89.891 101.89
## - PC2 1 92.290 102.29
## - PC5 1 92.670 102.67
## - PC1 1 94.718 104.72
## - PC3 1 96.454 106.45
##
## Step: AIC=101.28
## smoker ~ PC1 + PC2 + PC3 + PC5
##
## Df Deviance AIC
## <none> 91.279 101.28
## - PC5 1 94.048 102.05
## - PC2 1 94.272 102.27
## - PC1 1 95.437 103.44
## - PC3 1 97.456 105.46
summary(wi_step.res)
##
## Call:
## glm(formula = smoker ~ PC1 + PC2 + PC3 + PC5, family = "binomial",
## data = wi_step.df)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -1.9187 -0.9794 0.5463 0.8523 1.6837
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 0.763350 0.265636 2.874 0.00406 **
## PC1 0.003674 0.001872 1.963 0.04964 *
## PC2 0.006471 0.003812 1.698 0.08955 .
## PC3 -0.011865 0.005241 -2.264 0.02358 *
## PC5 -0.013869 0.008645 -1.604 0.10864
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 107.414 on 82 degrees of freedom
## Residual deviance: 91.279 on 78 degrees of freedom
## AIC: 101.28
##
## Number of Fisher Scoring iterations: 4
pt.wi.scores$pred_wi_step <- predict(wi_step.res, pt.wi.scores)
pt.wi.scores.roc4 <- roc(response = pt.wi.scores$smoker,
predictor = pt.wi.scores$pred_wi_step)
## Setting levels: control = 0, case = 1
## Setting direction: controls < cases
pt.wi.scores.roc4
##
## Call:
## roc.default(response = pt.wi.scores$smoker, predictor = pt.wi.scores$pred_wi_step)
##
## Data: pt.wi.scores$pred_wi_step in 29 controls (pt.wi.scores$smoker 0) < 54 cases (pt.wi.scores$smoker 1).
## Area under the curve: 0.7484
plot.roc(pt.wi.scores.roc4)
pt.analytic.df <- pt.df[pt.df$subject_id %in% analytic.N, ]
pt.analytic.df$occasional <- ifelse(pt.analytic.df$user_cat == "occasional", 1, 0)
pt.analytic.df$daily <- ifelse(pt.analytic.df$user_cat == "daily", 1, 0)
pt.analytic.df <- pt.analytic.df[order(pt.analytic.df$subject_id), ]
right_0.post.w <- right_0.post.w[order(right_0.post.w$subject_id), ]
post_gam.df <- data.frame("subject_id" = pt.analytic.df$subject_id,
"occ_user" = pt.analytic.df$occasional,
"daily_user" = pt.analytic.df$daily,
"pct_chg" = I(as.matrix(right_0.post.w[, c(4:602)])))
m.post_gam <- pffr(pct_chg ~ occ_user + daily_user, data = post_gam.df)
summary(m.post_gam)
##
## Family: gaussian
## Link function: identity
##
## Formula:
## pct_chg ~ occ_user + daily_user
##
## Constant coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -11.6541 0.0684 -170.4 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Smooth terms & functional coefficients:
## edf Ref.df F p-value
## Intercept(yindex) 17.638 19.000 424.21 <2e-16 ***
## occ_user(yindex) 4.936 4.994 120.65 <2e-16 ***
## daily_user(yindex) 2.951 3.255 39.99 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## R-sq.(adj) = 0.26 Deviance explained = 26.1%
## -REML score = 1.1837e+05 Scale est. = 56.978 n = 34385 (83 x 599)
right_0.pt.w <- right_0.pt.w[order(right_0.pt.w$subject_id), ]
wi_gam.df <- data.frame("subject_id" = pt.analytic.df$subject_id,
"occ_user" = pt.analytic.df$occasional,
"daily_user" = pt.analytic.df$daily,
"wi_pct_chg" = I(as.matrix(right_0.pt.w[, c(3:601)])))
m.wi_gam <- pffr(wi_pct_chg ~ occ_user + daily_user, data = wi_gam.df)
summary(m.wi_gam)
##
## Family: gaussian
## Link function: identity
##
## Formula:
## wi_pct_chg ~ occ_user + daily_user
##
## Constant coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 4.39565 0.08502 51.7 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Smooth terms & functional coefficients:
## edf Ref.df F p-value
## Intercept(yindex) 16.306 19.000 60.8 <2e-16 ***
## occ_user(yindex) 4.960 4.998 227.7 <2e-16 ***
## daily_user(yindex) 4.966 4.999 138.6 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## R-sq.(adj) = 0.12 Deviance explained = 12.1%
## -REML score = 1.1984e+05 Scale est. = 84.514 n = 32927 (83 x 599)
2.1 Comments for Ben